// read mother types
import delimited using ../../../data/data_derived/wage_types.csv, clear

keep mid cluster

save ./wage_types, replace

// read main data
import delimited using ../../../data/data_derived/psid_fam.csv, clear

// prepare for variables
keep if year==2002 & ind_not_sample==0 & age<=12

// merge with wage types
merge m:1 mid using ./wage_types, keep(match master) nogen

ren curr_married married

ren p_avg p
ren hhinvest p_g
gen g=p_g/p

ren m_wage W_m
ren f_wage W_f

gen W_m_tau_m=W_m*tau_m
gen W_f_tau_f=W_f*tau_f

ren chcare_exp P_c_Y_c
gen P_c=p_yocent_e_cps_cpkt/(33*52)  // center-based care prices
gen Y_c=P_c_Y_c/P_c

gen pbar_X=W_m_tau_m+p_g+P_c_Y_c
replace pbar_X=pbar_X+W_f_tau_f if married==1
replace pbar_X=. if pbar_X==0

gen W=W_m
replace W=W+W_f if married==1

ren num_0_12 nkid
ren num_0_5 nkid_0_5

ren med_cat edu_m
ren fed_cat edu_f

gen hrs_m=m_hrs/52
gen hrs_f=f_hrs/52

ren m_age age_m
ren f_age age_f
ren m_exper expr_m
ren f_exper expr_f

keep if W_m!=. & p!=. & P_c!=. & nkid!=. & nkid_0_5!=. & edu_m!=.
keep if (W_f!=. & edu_f!=.) | married==0

// use mother's age if father's age is missing
replace age_f=age_m if married==1 & age_f==.

// construct father's experience
replace expr_f=age_f-18*(fed_hsd|fed_hs)-20*(fed_scoll)-22*(fed_coll)-24*(fed_postcol) if married==1 & expr_f==.

gen id=_n

local vars id married age W_m_tau_m W_f_tau_f p_g P_c_Y_c pbar_X W_m W_f p P_c tau_m tau_f g Y_c hrs_m hrs_f W nkid nkid_0_5 edu_m edu_f cluster age_m age_f expr_m expr_f

foreach v in `vars' {
    replace `v'=-1 if `v'==.
}

outfile `vars' using ./data.txt, replace wide
